From 330234ad5195a7e3d12835bfeec6f020adb11565 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Wed, 11 Jul 2018 14:35:25 +0100 Subject: [PATCH] Move buzhash into lib/ and automatically chunk by default Signed-off-by: Jonathan Dieter --- src/{ => lib}/buzhash/LICENSE | 0 src/{ => lib}/buzhash/buzhash.c | 40 ++++++++++++++------ src/{ => lib}/buzhash/buzhash.h | 7 +++- src/lib/buzhash/meson.build | 1 + src/lib/comp/comp.c | 67 +++++++++++++++++++++++---------- src/lib/meson.build | 1 + src/lib/zck.c | 11 +++++- src/lib/zck_private.h | 3 ++ src/meson.build | 2 +- 9 files changed, 98 insertions(+), 34 deletions(-) rename src/{ => lib}/buzhash/LICENSE (100%) rename src/{ => lib}/buzhash/buzhash.c (83%) rename src/{ => lib}/buzhash/buzhash.h (55%) create mode 100644 src/lib/buzhash/meson.build diff --git a/src/buzhash/LICENSE b/src/lib/buzhash/LICENSE similarity index 100% rename from src/buzhash/LICENSE rename to src/lib/buzhash/LICENSE diff --git a/src/buzhash/buzhash.c b/src/lib/buzhash/buzhash.c similarity index 83% rename from src/buzhash/buzhash.c rename to src/lib/buzhash/buzhash.c index 06ecc9a..c1fc399 100644 --- a/src/buzhash/buzhash.c +++ b/src/lib/buzhash/buzhash.c @@ -94,18 +94,36 @@ const uint32_t buzhash_table[] = { 0x7bf7cabc, 0xf9c18d66, 0x593ade65, 0xd95ddf11, }; -uint32_t buzhash_setup(buzHash *b, const char *s, size_t n) { - b->h = 0; - b->window_size = n; - for (size_t i = 1; i < n; i++, s++) - b->h ^= rol32 (buzhash_table[(size_t) (*s)], n - i); - b->h ^= buzhash_table[(size_t) (*s)]; - return b->h; -} - -uint32_t buzhash_update (buzHash *b, const char *s) { +uint32_t buzhash_update (buzHash *b, const char *s, size_t window) { + if(b->window == NULL || b->window_size != window) { + if(b->window) + free(b->window); + b->window = calloc(1, window); + b->window_loc = 0; + b->window_fill = 0; + b->window_size = window; + b->h = 0; + } + if(b->window_fill < b->window_size) { + b->window[b->window_fill] = *s; + b->window_fill++; + if(b->window_fill < b->window_size) { + b->h ^= rol32 (buzhash_table[(size_t) (*s)], window - b->window_fill); + return 1; + } else { + b->h ^= buzhash_table[(size_t) (*s)]; + return b->h; + } + } b->h = rol32 (b->h, 1) ^ - rol32 (buzhash_table[(size_t) *(s-b->window_size)], b->window_size) ^ + rol32 (buzhash_table[(size_t) b->window[b->window_loc]], b->window_size) ^ buzhash_table[(size_t) *s]; + b->window[b->window_loc++] = *s; + b->window_loc %= b->window_size; return b->h; } + +void buzhash_reset (buzHash *b) { + free(b->window); + b->window = NULL; +} diff --git a/src/buzhash/buzhash.h b/src/lib/buzhash/buzhash.h similarity index 55% rename from src/buzhash/buzhash.h rename to src/lib/buzhash/buzhash.h index 9ea9dd8..06f6917 100644 --- a/src/buzhash/buzhash.h +++ b/src/lib/buzhash/buzhash.h @@ -8,9 +8,12 @@ typedef struct buzHash { uint32_t h; int window_size; + char *window; + int window_loc; + int window_fill; } buzHash; -uint32_t buzhash_setup(buzHash *b, const char *s, size_t n); -uint32_t buzhash_update (buzHash *b, const char *s); +uint32_t buzhash_update (buzHash *b, const char *s, size_t window); +void buzhash_reset (buzHash *b); #endif diff --git a/src/lib/buzhash/meson.build b/src/lib/buzhash/meson.build new file mode 100644 index 0000000..e3a1917 --- /dev/null +++ b/src/lib/buzhash/meson.build @@ -0,0 +1 @@ +lib_sources += files('buzhash.c') diff --git a/src/lib/comp/comp.c b/src/lib/comp/comp.c index 1a3fe9e..ae4d73a 100644 --- a/src/lib/comp/comp.c +++ b/src/lib/comp/comp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "zck_private.h" @@ -165,6 +166,31 @@ static ssize_t comp_end_dchunk(zckCtx *zck, int use_dict, size_t fd_size) { return rb; } +static ssize_t comp_write(zckCtx *zck, const char *src, const size_t src_size) { + VALIDATE_WRITE_SIZE(zck); + + if(!zck->comp.started && !comp_init(zck)) + return -1; + + if(src_size == 0) + return 0; + + char *dst = NULL; + size_t dst_size = 0; + if(zck->comp.compress(&(zck->comp), src, src_size, &dst, &dst_size, 1) < 0) + return -1; + if(dst_size > 0 && !write_data(zck->temp_fd, dst, dst_size)) { + free(dst); + return -1; + } + if(!index_add_to_chunk(zck, dst, dst_size, src_size)) { + free(dst); + return -1; + } + free(dst); + return src_size; +} + int comp_init(zckCtx *zck) { VALIDATE(zck); @@ -461,27 +487,29 @@ const char PUBLIC *zck_comp_name_from_type(int comp_type) { } ssize_t PUBLIC zck_write(zckCtx *zck, const char *src, const size_t src_size) { - VALIDATE_WRITE_SIZE(zck); - - if(!zck->comp.started && !comp_init(zck)) - return -1; - - if(src_size == 0) - return 0; - - char *dst = NULL; - size_t dst_size = 0; - if(zck->comp.compress(&(zck->comp), src, src_size, &dst, &dst_size, 1) < 0) - return -1; - if(dst_size > 0 && !write_data(zck->temp_fd, dst, dst_size)) { - free(dst); - return -1; + if(zck->manual_chunk) + return comp_write(zck, src, src_size); + + const char *loc = src; + size_t loc_size = src_size; + for(size_t i=0; ibuzhash), loc+i, zck->buzhash_width) & + zck->buzhash_bitmask) == 0) { + if(comp_write(zck, loc, i) != i) + return -1; + zck_log(ZCK_LOG_DEBUG, "Automatically ending chunk\n"); + if(zck_end_chunk(zck) < 0) + return -1; + loc += i; + loc_size -= i; + i = 0; + buzhash_reset(&(zck->buzhash)); + } else { + i++; + } } - if(!index_add_to_chunk(zck, dst, dst_size, src_size)) { - free(dst); + if(loc_size > 0 && comp_write(zck, loc, loc_size) != loc_size) return -1; - } - free(dst); return src_size; } @@ -512,6 +540,7 @@ ssize_t PUBLIC zck_end_chunk(zckCtx *zck) { free(dst); return -1; } + zck_log(ZCK_LOG_DEBUG, "Finished chunk size: %lu\n", data_size); free(dst); return data_size; } diff --git a/src/lib/meson.build b/src/lib/meson.build index df18c4b..a8647b2 100644 --- a/src/lib/meson.build +++ b/src/lib/meson.build @@ -1,5 +1,6 @@ lib_sources = files() add_global_arguments('-fvisibility=hidden', language : 'c') +subdir('buzhash') subdir('comp') subdir('hash') subdir('index') diff --git a/src/lib/zck.c b/src/lib/zck.c index 225679c..9cadc8e 100644 --- a/src/lib/zck.c +++ b/src/lib/zck.c @@ -120,6 +120,14 @@ static char *ascii_checksum_to_bin (char *checksum) { return raw_checksum; } +static void update_buzhash_bits(zckCtx *zck) { + int s=1; + for(int i=0; ibuzhash_match_bits; i++) + s *= 2; + s -= 1; + zck->buzhash_bitmask = s; +} + int get_tmp_fd() { int temp_fd; char *fname = NULL; @@ -347,7 +355,8 @@ zckCtx PUBLIC *zck_create() { zck->prep_hash_type = -1; zck->prep_hdr_size = -1; zck->buzhash_width = DEFAULT_BUZHASH_WIDTH; - zck->buzhash_match_bits = (2^DEFAULT_BUZHASH_BITS) - 1; + zck->buzhash_match_bits = DEFAULT_BUZHASH_BITS; + update_buzhash_bits(zck); return zck; } diff --git a/src/lib/zck_private.h b/src/lib/zck_private.h index c68c505..0306171 100644 --- a/src/lib/zck_private.h +++ b/src/lib/zck_private.h @@ -4,6 +4,7 @@ #include #include #include +#include "buzhash/buzhash.h" #define BUF_SIZE 32768 /* Maximum string length for a compressed size_t */ @@ -208,8 +209,10 @@ typedef struct zckCtx { char *data; size_t data_size; + buzHash buzhash; int buzhash_width; int buzhash_match_bits; + int buzhash_bitmask; int manual_chunk; } zckCtx; diff --git a/src/meson.build b/src/meson.build index a572dac..01978cb 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,7 +1,7 @@ add_global_arguments('-Wunused-result', language : 'c') add_global_arguments('-std=gnu99', language : 'c') subdir('lib') -executable('zck', ['zck.c', 'util_common.c', 'buzhash/buzhash.c'], include_directories: inc, link_with: zcklib, install: true) +executable('zck', ['zck.c', 'util_common.c'], include_directories: inc, link_with: zcklib, install: true) executable('unzck', ['unzck.c', 'util_common.c'], include_directories: inc, link_with: zcklib, install: true) executable('zckdl', ['zck_dl.c', 'util_common.c'], include_directories: inc, dependencies: curl_dep, link_with: zcklib, install: true) executable('zck_read_header', ['zck_read_header.c', 'util_common.c'], include_directories: inc, link_with: zcklib, install: true) -- 2.30.2